The following code digs into the chronological clustering of both the community size spectrum slopes, and the mean sizes across all years of the NMFS groundfish survey.
Data is prepared and updated using {targets} to ensure a consistent data state and a reproducible workflow.
Target Data
Data for the report comes directly from the {targets} workflow found in _targets.R.
#### Data ####
#### Size Spectrum Targets
# SS and manual bins together
withr::with_dir(rprojroot::find_root('_targets.R'), tar_load(size_spectrum_indices))
size_spectrum_indices <- size_spectrum_indices %>%
mutate(season = fct_rev(season),
survey_area = factor(survey_area,
levels = c("GoM", "GB", "SNE", "MAB")),
yr = as.numeric(as.character(Year)))
#### OISST Data
# OISST for all the regions
withr::with_dir(rprojroot::find_root('_targets.R'), tar_load(regional_oisst))
#### Size Data Targets
# 1. Biological data used as input
withr::with_dir(rprojroot::find_root('_targets.R'), tar_load(nefsc_1g))
# 2. Mean sizes grouped on - yr, season, survey area, taxon group, fishery status
withr::with_dir(rprojroot::find_root('_targets.R'), tar_load(mean_individual_sizes))
# 3. Mean sizes using same groupings as the size spectrum indices
withr::with_dir(rprojroot::find_root('_targets.R'), tar_load(mean_sizes_ss_groups))
# quick little format
nefsc_1g <- nefsc_1g %>%
mutate(fishery = case_when(
fishery == "com" ~ "Commercially Targeted",
fishery == "nc" ~ "Not Commercially Targeted",
TRUE ~ "Not Labelled"
))
# Cut up discrete length and weight bins
nefsc_size_bins <- nefsc_1g %>%
mutate(
length_bin = case_when(
LngtClass <= 5 ~ "0 - 5cm",
LngtClass <= 10 ~ "5 - 10cm",
LngtClass <= 25 ~ "10 - 25cm",
LngtClass <= 50 ~ "25 - 50cm",
LngtClass <= 100 ~ "50 - 100cm",
LngtClass >= 100 ~ ">= 100cm"),
length_bin = factor(length_bin, levels = c(
"0 - 5cm", "5 - 10cm", "10 - 25cm",
"25 - 50cm", "50 - 100cm", ">= 100cm")))
# Weight bins
nefsc_size_bins <- nefsc_size_bins %>%
mutate(
weight_bin = case_when(
ind_weight_kg <= 0.001 ~ "0 - 1g",
ind_weight_kg <= 0.005 ~ "1 - 5g",
ind_weight_kg <= 0.010 ~ "5 - 10g",
ind_weight_kg <= 0.050 ~ "10 - 50g",
ind_weight_kg <= 0.100 ~ "50 - 100g",
ind_weight_kg <= 0.500 ~ "100 - 500g",
ind_weight_kg <= 1.000 ~ ".5 - 1kg",
ind_weight_kg <= 5.000 ~ "1 - 5kg",
ind_weight_kg <= 10.00 ~ "5 - 10kg",
ind_weight_kg >= 10.00 ~ ">= 10kg" ),
weight_bin = factor(weight_bin, levels = c(
"0 - 1g", "1 - 5g", "5 - 10g", "10 - 50g",
"50 - 100g", "100 - 500g", ".5 - 1kg",
"1 - 5kg", "5 - 10kg", ">= 10kg")))
# Rename the functional groups
nefsc_size_bins <- nefsc_size_bins %>%
mutate(
spec_class = case_when(
spec_class == "gf" ~ "Groundfish",
spec_class == "pel" ~ "Pelagic",
spec_class == "dem" ~ "Demersal",
spec_class == "el" ~ "Elasmobranch",
spec_class == "<NA>" ~ "NA"
))
# Make regions go N->S
nefsc_size_bins <- nefsc_size_bins %>%
mutate(
survey_area = factor(survey_area, levels = c("GoM", "GB", "SNE", "MAB")
))
# Do some grouping
regional_totals <- nefsc_size_bins %>%
group_by(Year, survey_area) %>%
summarise(total_survey_catch = sum(Number),
total_lw_bio = sum(sum_weight_kg),
total_strat_abund = sum(expanded_abund_s),
total_strat_lw_bio = sum(sum_weight_kg)) %>%
ungroup()
# Get fraction for different length/weight classes
# length bins
regional_lengths <- nefsc_size_bins %>%
group_by(Year, survey_area, length_bin) %>%
summarise(lenbin_survey_catch = sum(Number),
lenbin_lw_bio = sum(sum_weight_kg),
lenbin_strat_abund = sum(expanded_abund_s),
lenbin_strat_lw_bio = sum(stratified_sum_bodymass)) %>%
left_join(regional_totals) %>%
mutate(
perc_total_catch = (lenbin_survey_catch - total_survey_catch) * 100,
perc_lw_bio = (lenbin_lw_bio - total_lw_bio) * 100,
perc_strat_abund = (lenbin_strat_abund - total_strat_abund) * 100,
perc_strat_lw_bio = (lenbin_strat_lw_bio - total_strat_lw_bio) * 100)
# Expected Abundance for Entire Survey Area
ggplot(regional_lengths,
aes(Year, lenbin_strat_abund, fill = length_bin)) +
geom_col(position = "stack") +
facet_wrap(~survey_area) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Predicted Abundance - Full Survey Area",
title = "Abundance",
subtitle = "Expected Area-Stratified Abundance",
x = "",
fill = "Length Bin")
# Expected Biomass for Entire Survey Area
ggplot(regional_lengths,
aes(Year, lenbin_strat_lw_bio, fill = length_bin)) +
geom_col(position = "stack") +
facet_wrap(~survey_area) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Expected Biomass (kg) - Full Survey Area",
title = "Biomass",
subtitle = "Expected Area-Stratified Biomass",
x = "",
fill = "Length Bin")+ theme(legend.position = "none")
# weight bins
regional_weights <- nefsc_size_bins %>%
group_by(Year, survey_area, weight_bin) %>%
summarise(wtbin_survey_catch = sum(Number),
wtbin_lw_bio = sum(sum_weight_kg),
wtbin_strat_abund = sum(expanded_abund_s),
wtbin_strat_lw_bio = sum(sum_weight_kg)) %>%
left_join(regional_totals) %>%
mutate(
perc_total_catch = (wtbin_survey_catch / total_survey_catch) * 100,
perc_lw_bio = (wtbin_lw_bio / total_lw_bio) * 100,
perc_strat_abund = (wtbin_strat_abund / total_strat_abund) * 100,
perc_strat_lw_bio = (wtbin_strat_lw_bio / total_strat_lw_bio) * 100)
# Expected Abundance for Entire Survey Area
ggplot(regional_weights,
aes(Year, wtbin_strat_abund, fill = weight_bin)) +
geom_col(position = "stack") +
facet_wrap(~survey_area) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Predicted Abundance - Full Survey Area",
title = "Abundance",
subtitle = "Expected Area-Stratified Abundance" ,
x = "",
fill = "Weight Bin")
# Expected Biomass for Entire Survey Area
ggplot(regional_weights,
aes(Year, wtbin_strat_lw_bio, fill = weight_bin)) +
geom_col(position = "stack") +
facet_wrap(~survey_area) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Expected Biomass (kg) - Full Survey Area",
title = "Biomass",
subtitle = "Expected Area-Stratified Biomass",
x = "",
fill = "Weight Bin") + theme(legend.position = "none")
# Do some grouping
seasonal_totals <- nefsc_size_bins %>%
group_by(Year, season) %>%
summarise(total_survey_catch = sum(Number),
total_lw_bio = sum(sum_weight_kg),
total_strat_abund = sum(expanded_abund_s),
total_strat_lw_bio = sum(sum_weight_kg)) %>%
ungroup()
# Get fraction for different length/weight classes
# length bins
seasonal_lengths <- nefsc_size_bins %>%
group_by(Year, season, length_bin) %>%
summarise(lenbin_survey_catch = sum(Number),
lenbin_lw_bio = sum(sum_weight_kg),
lenbin_strat_abund = sum(expanded_abund_s),
lenbin_strat_lw_bio = sum(sum_weight_kg)) %>%
left_join(seasonal_totals) %>%
mutate(
perc_total_catch = (lenbin_survey_catch - total_survey_catch) * 100,
perc_lw_bio = (lenbin_lw_bio - total_lw_bio) * 100,
perc_strat_abund = (lenbin_strat_abund - total_strat_abund) * 100,
perc_strat_lw_bio = (lenbin_strat_lw_bio - total_strat_lw_bio) * 100)
# Expected Abundance for Entire Survey Area
ggplot(seasonal_lengths,
aes(Year, lenbin_strat_abund, fill = length_bin)) +
geom_col(position = "stack") +
facet_wrap(~season) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Predicted Abundance - Full Survey Area",
title = "Abundance",
subtitle = "Expected Area-Stratified Abundance" ,
x = "",
fill = "Length Bin")
# Expected Biomass for Entire Survey Area
ggplot(seasonal_lengths,
aes(Year, lenbin_strat_lw_bio, fill = length_bin)) +
geom_col(position = "stack") +
facet_wrap(~season) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Expected Biomass (kg) - Full Survey Area",
title = "Biomass",
subtitle = "Expected Area-Stratified Biomass",
x = "",
fill = "Length Bin") + theme(legend.position = "none")
# weight bins
seasonal_weights <- nefsc_size_bins %>%
group_by(Year, season, weight_bin) %>%
summarise(wtbin_survey_catch = sum(Number),
wtbin_lw_bio = sum(sum_weight_kg),
wtbin_strat_abund = sum(expanded_abund_s),
wtbin_strat_lw_bio = sum(sum_weight_kg)) %>%
left_join(seasonal_totals) %>%
mutate(
perc_total_catch = (wtbin_survey_catch / total_survey_catch) * 100,
perc_lw_bio = (wtbin_lw_bio / total_lw_bio) * 100,
perc_strat_abund = (wtbin_strat_abund / total_strat_abund) * 100,
perc_strat_lw_bio = (wtbin_strat_lw_bio / total_strat_lw_bio) * 100)
# Expected Abundance for Entire Survey Area
ggplot(seasonal_weights,
aes(Year, wtbin_strat_abund, fill = weight_bin)) +
geom_col(position = "stack") +
facet_wrap(~season) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Predicted Abundance - Full Survey Area",
title = "Abundance",
subtitle = "Expected Area-Stratified Abundance" ,
x = "",
fill = "Weight Bin")
# Expected Biomass for Entire Survey Area
ggplot(seasonal_weights,
aes(Year, wtbin_strat_lw_bio, fill = weight_bin)) +
geom_col(position = "stack") +
facet_wrap(~season) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Expected Biomass (kg) - Full Survey Area",
title = "Biomass",
subtitle = "Expected Area-Stratified Biomass",
x = "",
fill = "Weight Bin") + theme(legend.position = "none")
# Do some grouping
fgroup_totals <- nefsc_size_bins %>%
group_by(Year, spec_class) %>%
summarise(total_survey_catch = sum(Number),
total_lw_bio = sum(sum_weight_kg),
total_strat_abund = sum(expanded_abund_s),
total_strat_lw_bio = sum(sum_weight_kg)) %>%
ungroup()
# Get fraction for different length/weight classes
# length bins
fgroup_lengths <- nefsc_size_bins %>%
group_by(Year, spec_class, length_bin) %>%
summarise(lenbin_survey_catch = sum(Number),
lenbin_lw_bio = sum(sum_weight_kg),
lenbin_strat_abund = sum(expanded_abund_s),
lenbin_strat_lw_bio = sum(sum_weight_kg)) %>%
left_join(fgroup_totals) %>%
mutate(
perc_total_catch = (lenbin_survey_catch - total_survey_catch) * 100,
perc_lw_bio = (lenbin_lw_bio - total_lw_bio) * 100,
perc_strat_abund = (lenbin_strat_abund - total_strat_abund) * 100,
perc_strat_lw_bio = (lenbin_strat_lw_bio - total_strat_lw_bio) * 100)
# Expected Abundance for Entire Survey Area
ggplot(fgroup_lengths,
aes(Year, lenbin_strat_abund, fill = length_bin)) +
geom_col(position = "stack") +
facet_wrap(~spec_class) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Predicted Abundance - Full Survey Area",
title = "Abundance",
subtitle = "Expected Area-Stratified Abundance" ,
x = "",
fill = "Length Bin")
# Expected Biomass for Entire Survey Area
ggplot(fgroup_lengths,
aes(Year, lenbin_strat_lw_bio, fill = length_bin)) +
geom_col(position = "stack") +
facet_wrap(~spec_class) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Expected Biomass (kg) - Full Survey Area",
title = "Biomass",
subtitle = "Expected Area-Stratified Biomass",
x = "",
fill = "Length Bin")
# weight bins
fgroup_weigths <- nefsc_size_bins %>%
group_by(Year, spec_class, weight_bin) %>%
summarise(wtbin_survey_catch = sum(Number),
wtbin_lw_bio = sum(sum_weight_kg),
wtbin_strat_abund = sum(expanded_abund_s),
wtbin_strat_lw_bio = sum(sum_weight_kg)) %>%
left_join(fgroup_totals) %>%
mutate(
perc_total_catch = (wtbin_survey_catch / total_survey_catch) * 100,
perc_lw_bio = (wtbin_lw_bio / total_lw_bio) * 100,
perc_strat_abund = (wtbin_strat_abund / total_strat_abund) * 100,
perc_strat_lw_bio = (wtbin_strat_lw_bio / total_strat_lw_bio) * 100)
# Expected Abundance for Entire Survey Area
ggplot(fgroup_weigths,
aes(Year, wtbin_strat_abund, fill = weight_bin)) +
geom_col(position = "stack") +
facet_wrap(~spec_class) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Predicted Abundance - Full Survey Area",
title = "Abundance",
subtitle = "Expected Area-Stratified Abundance",
x = "",
fill = "Weight Bin")
# Expected Biomass for Entire Survey Area
ggplot(fgroup_weigths,
aes(Year, wtbin_strat_lw_bio, fill = weight_bin)) +
geom_col(position = "stack") +
facet_wrap(~spec_class) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Expected Biomass (kg) - Full Survey Area",
title = "Biomass",
subtitle = "Expected Area-Stratified Biomass",
x = "",
fill = "Weight Bin")
Thought it may be helpful to have a table here to check which species are currently assigned to which group.
# Display table of whichever specis fall into the categories
nefsc_size_bins %>%
distinct(SpecCode, scientific_name, spec_class) %>%
rename(`Common Name` = SpecCode,
`Scientific Name` = scientific_name,
`Functional Group` = spec_class) %>%
arrange(`Functional Group`, `Common Name`) %>%
DT::datatable(filter = "top", rownames = FALSE)
# Do some grouping
fish_totals <- nefsc_size_bins %>%
group_by(Year, fishery) %>%
summarise(total_survey_catch = sum(Number),
total_lw_bio = sum(sum_weight_kg),
total_strat_abund = sum(expanded_abund_s),
total_strat_lw_bio = sum(sum_weight_kg)) %>%
ungroup()
# Get fraction for different length/weight classes
# length bins
fish_lengths <- nefsc_size_bins %>%
group_by(Year, fishery, length_bin) %>%
summarise(lenbin_survey_catch = sum(Number),
lenbin_lw_bio = sum(sum_weight_kg),
lenbin_strat_abund = sum(expanded_abund_s),
lenbin_strat_lw_bio = sum(sum_weight_kg)) %>%
left_join(fish_totals) %>%
mutate(
perc_total_catch = (lenbin_survey_catch - total_survey_catch) * 100,
perc_lw_bio = (lenbin_lw_bio - total_lw_bio) * 100,
perc_strat_abund = (lenbin_strat_abund - total_strat_abund) * 100,
perc_strat_lw_bio = (lenbin_strat_lw_bio - total_strat_lw_bio) * 100)
# Expected Abundance for Entire Survey Area
ggplot(fish_lengths,
aes(Year, lenbin_strat_abund, fill = length_bin)) +
geom_col(position = "stack") +
facet_wrap(~fishery) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Predicted Abundance - Full Survey Area",
title = "Abundance",
subtitle = "Expected Area-Stratified Abundance" ,
x = "",
fill = "Length Bin")
# # Biomass from Survey using L-W
# Expected Biomass for Entire Survey Area
ggplot(fish_lengths,
aes(Year, lenbin_strat_lw_bio, fill = length_bin)) +
geom_col(position = "stack") +
facet_wrap(~fishery) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Expected Biomass (kg) - Full Survey Area",
title = "Biomass",
subtitle = "Expected Area-Stratified Biomass",
x = "",
fill = "Length Bin")
# weight bins
fish_weigths <- nefsc_size_bins %>%
group_by(Year, fishery, weight_bin) %>%
summarise(wtbin_survey_catch = sum(Number),
wtbin_lw_bio = sum(sum_weight_kg),
wtbin_strat_abund = sum(expanded_abund_s),
wtbin_strat_lw_bio = sum(sum_weight_kg)) %>%
left_join(fish_totals) %>%
mutate(
perc_total_catch = (wtbin_survey_catch / total_survey_catch) * 100,
perc_lw_bio = (wtbin_lw_bio / total_lw_bio) * 100,
perc_strat_abund = (wtbin_strat_abund / total_strat_abund) * 100,
perc_strat_lw_bio = (wtbin_strat_lw_bio / total_strat_lw_bio) * 100)
# Expected Abundance for Entire Survey Area
ggplot(fish_weigths,
aes(Year, wtbin_strat_abund, fill = weight_bin)) +
geom_col(position = "stack") +
facet_wrap(~fishery) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Predicted Abundance - Full Survey Area",
title = "Abundance",
subtitle = "Expected Area-Stratified Abundance",
x = "",
fill = "Weight Bin")
# Expected Biomass for Entire Survey Area
ggplot(fish_weigths,
aes(Year, wtbin_strat_lw_bio, fill = weight_bin)) +
geom_col(position = "stack") +
facet_wrap(~fishery) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Expected Biomass (kg) - Full Survey Area",
title = "Biomass",
subtitle = "Expected Area-Stratified Biomass",
x = "",
fill = "Weight Bin")
Thought it may be helpful to have a table here to check which species are currently assigned to which group.
# Display table of whichever specis fall into the categories
nefsc_size_bins %>%
distinct(SpecCode, scientific_name, fishery) %>%
rename(`Common Name` = SpecCode,
`Scientific Name` = scientific_name,
`Fishery Status` = fishery) %>%
arrange(`Fishery Status`, `Common Name`) %>%
DT::datatable(filter = "top", rownames = FALSE)
Community size spectrum slopes were estimated using 2 methods for comparison, using a minimum individual biomass of 1 gram, and using area stratified abundances.
The first was using the methods of the {sizeSpectra} package which were shown to be the most accurate when using simulated data.
The second method uses binned abundances, with bins of width 0.5 on a log10 scale, so 10^0 - 10^0.5 etc. These bins were then normalized by dividing the abundances by the bin width to account for the increasing bin size.
Results from Both Methods
# Pull the group ID for the slopes grouped on year, season, and region
warmem_group_slopes <- size_spectrum_indices %>%
filter(`group ID`== "single years * season * region")
# Or just regions and years
year_region_slopes <- size_spectrum_indices %>%
filter(`group ID` == "single years * region")
Season makes it kind of tricky to understand what is going on, so as a starting point I will show the results of just single years and the data from both seasons
# Do some grouping
year_region_totals <- nefsc_size_bins %>%
group_by(Year, survey_area) %>%
summarise(total_survey_catch = sum(Number),
total_lw_bio = sum(sum_weight_kg),
total_strat_abund = sum(expanded_abund_s),
total_strat_lw_bio = sum(sum_weight_kg),
.groups = "drop")
# weight bins
year_region_weights <- nefsc_size_bins %>%
group_by(Year, survey_area, weight_bin) %>%
summarise(wtbin_survey_catch = sum(Number),
wtbin_lw_bio = sum(sum_weight_kg),
wtbin_strat_abund = sum(expanded_abund_s),
wtbin_strat_lw_bio = sum(sum_weight_kg),
.groups = "drop") %>%
left_join(year_region_totals) %>%
mutate(
perc_total_catch = (wtbin_survey_catch / total_survey_catch) * 100,
perc_lw_bio = (wtbin_lw_bio / total_lw_bio) * 100,
perc_strat_abund = (wtbin_strat_abund / total_strat_abund) * 100,
perc_strat_lw_bio = (wtbin_strat_lw_bio / total_strat_lw_bio) * 100)
# Expected Biomass for Entire Survey Area
ggplot(year_region_weights,
aes(Year, wtbin_strat_lw_bio, fill = weight_bin)) +
geom_col(position = "stack") +
facet_grid(survey_area~.) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Expected Biomass (kg) - Full Survey Area",
title = "Biomass",
subtitle = "Expected Area-Stratified Biomass",
x = "",
fill = "Length Bin")
The first lens to look at is the seasonal variation across all the different areas. This is the typical grouping that we have been focusing on.
# Do some grouping
warmem_totals <- nefsc_size_bins %>%
group_by(Year, survey_area, season) %>%
summarise(total_survey_catch = sum(Number),
total_lw_bio = sum(sum_weight_kg),
total_strat_abund = sum(expanded_abund_s),
total_strat_lw_bio = sum(sum_weight_kg)) %>%
ungroup()
# weight bins
warmem_weights <- nefsc_size_bins %>%
group_by(Year, survey_area, season, weight_bin) %>%
summarise(wtbin_survey_catch = sum(Number),
wtbin_lw_bio = sum(sum_weight_kg),
wtbin_strat_abund = sum(expanded_abund_s),
wtbin_strat_lw_bio = sum(sum_weight_kg)) %>%
left_join(warmem_totals) %>%
mutate(
perc_total_catch = (wtbin_survey_catch / total_survey_catch) * 100,
perc_lw_bio = (wtbin_lw_bio / total_lw_bio) * 100,
perc_strat_abund = (wtbin_strat_abund / total_strat_abund) * 100,
perc_strat_lw_bio = (wtbin_strat_lw_bio / total_strat_lw_bio) * 100)
# Expected Biomass for Entire Survey Area
ggplot(warmem_weights,
aes(Year, wtbin_strat_lw_bio, fill = weight_bin)) +
geom_col(position = "stack") +
facet_grid(survey_area~season) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Expected Biomass (kg) - Full Survey Area",
title = "Biomass",
subtitle = "Expected Area-Stratified Biomass",
x = "",
fill = "Length Bin")
The second lens that I feel is important is the functional groups. This is the not the typical grouping that we have been focusing on, but clears the air on where the biomass increase is coming from.
# Do some grouping
fgroup_totals <- nefsc_size_bins %>%
group_by(Year, survey_area, spec_class) %>%
summarise(total_survey_catch = sum(Number),
total_lw_bio = sum(sum_weight_kg),
total_strat_abund = sum(expanded_abund_s),
total_strat_lw_bio = sum(sum_weight_kg)) %>%
ungroup()
# weight bins
fgroup_weights <- nefsc_size_bins %>%
group_by(Year, survey_area, spec_class, weight_bin) %>%
summarise(wtbin_survey_catch = sum(Number),
wtbin_lw_bio = sum(sum_weight_kg),
wtbin_strat_abund = sum(expanded_abund_s),
wtbin_strat_lw_bio = sum(sum_weight_kg)) %>%
left_join(fgroup_totals) %>%
mutate(
perc_total_catch = (wtbin_survey_catch / total_survey_catch) * 100,
perc_lw_bio = (wtbin_lw_bio / total_lw_bio) * 100,
perc_strat_abund = (wtbin_strat_abund / total_strat_abund) * 100,
perc_strat_lw_bio = (wtbin_strat_lw_bio / total_strat_lw_bio) * 100)
# Expected Biomass for Entire Survey Area
ggplot(fgroup_weights,
aes(Year, wtbin_strat_lw_bio, fill = weight_bin)) +
geom_col(position = "stack") +
facet_grid(spec_class~survey_area) +
scale_fill_gmri() +
scale_y_continuous(labels = comma_format()) +
labs(y = "Total Expected Biomass (kg) - Full Survey Area",
title = "Biomass",
subtitle = "Expected Area-Stratified Biomass",
x = "",
fill = "Length Bin")
The Edwards methodology differs from the other methods for estimating size spectra by avoiding the subjective decisions around how to bin data prior to fitting the log-linear regression.
Instead, Edwards uses the individual size distributions (how many individuals of any given length/weight). Abundances are totaled into discrete size bins based on expected biomass at length and length + 1, and individuals that fall within those bins are totaled to get abundances across a continuous distribution of individual bodymass.
The next difference is that the individual body size data is presumed to follow a bounded power-law distribution, with a minimum and maximum body size. Using the individual size data, maximum likelihood estimation is used to solve for the parameter (b) that minimizes the negative log-likelihood.
# Just years and regions
(ss_patterns <- year_region_slopes %>%
ggplot(aes(yr, b, color = survey_area)) +
geom_line(aes(group = survey_area)) +
geom_point(alpha = 0.6) +
facet_grid(survey_area~.) +
scale_color_gmri() +
labs(x = "",
y = "Size Spectrum Slope (b)",
color = "",
title = "Results from Area-Stratified Abundance - Edwards Method")
)
Prep and Clustering Code
# Reshaping
# Goal: Row for Years, columns for each different group
cluster_dat <- year_region_slopes %>%
select(Year, survey_area, b)
# Pivot wider
ss_dat <- cluster_dat %>%
pivot_wider(names_from = c(survey_area),
values_from = b,
names_sep = "_") %>%
column_to_rownames(var = "Year")
# Function to run chronological clustering
run_chron_clust <- function(in_dat){
# Get Euclidean Distances
eucdist <- vegdist(in_dat,
method = "euclidean",
binary = FALSE,
diag = FALSE,
upper = FALSE,
na.rm = TRUE)
# Perform Chronological Clustering on distances
cl <- chclust(eucdist, method = "coniss")
# Return list
return(list(eucdist = eucdist, cl = cl))
}
# Run for sizeSpectra Results
ss_chron <- run_chron_clust(in_dat = ss_dat)
Broomstick Plot
# broken stick model
vegan::bstick(ss_chron$cl, plot=T)
Dendrogram
plot(ss_chron$cl,
hang = -0.1,
axes = FALSE,
cex = 0.8,
horiz = F)
axis(side = 2, cex.axis = 1)
title("sizeSpectra Clustering Metrics", cex = 1)
mtext(side = 2, line = 2.3, "Sum of squares", cex = 1, las = 0)
ss_patterns +
geom_vline(aes(xintercept = 1984.5), color = "gray40", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 2008.5), color = "gray40", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 2003.5), color = "gray40", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 1975.5), color = "gray40", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 1973.5), color = "gray40", linetype = 2, size = 0.7)
# Plot the sizeSpectra slopes - year*region*season
(ss_patterns <- warmem_group_slopes %>%
ggplot(aes(yr, b, color = survey_area)) +
geom_line(aes(group = survey_area)) +
geom_point(alpha = 0.6) +
facet_grid(survey_area~season) +
scale_color_gmri() +
labs(x = "",
y = "Size Spectrum Slope (b)",
color = "",
title = "Results from Area-Stratified Abundance - Edwards Method")
)
Prep and Clustering Code
# Reshaping
# Goal: Row for Years, columns for each different group
cluster_dat <- warmem_group_slopes %>%
select(Year, season, survey_area, b)
# Pivot wider
ss_dat <- cluster_dat %>%
pivot_wider(names_from = c(season, survey_area),
values_from = b,
names_sep = "_") %>%
column_to_rownames(var = "Year")
# Function to run chronological clustering
run_chron_clust <- function(in_dat){
# Get Euclidean Distances
eucdist <- vegdist(in_dat,
method = "euclidean",
binary = FALSE,
diag = FALSE,
upper = FALSE,
na.rm = TRUE)
# Perform Chronological Clustering on distances
cl <- chclust(eucdist, method = "coniss")
# Return list
return(list(eucdist = eucdist, cl = cl))
}
# Run for sizeSpectra Results
ss_chron <- run_chron_clust(in_dat = ss_dat)
Broomstick Plot
# broken stick model
vegan::bstick(ss_chron$cl, plot=T)
Dendrogram
plot(ss_chron$cl,
hang = -0.1,
axes = FALSE,
cex = 0.8,
horiz = F)
axis(side = 2, cex.axis = 1)
title("sizeSpectra Clustering Metrics", cex = 1)
mtext(side = 2, line = 2.3, "Sum of squares", cex = 1, las = 0)
ss_patterns +
geom_vline(aes(xintercept = 1987.5), color = "gray40", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 2008.5), color = "gray40", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 2004.5), color = "gray40", linetype = 2, size = 0.7)
# plot trends of log10 slopes
(l10_patterns <- warmem_group_slopes %>%
ggplot(aes(yr, l10_slope_strat, color = survey_area)) +
geom_line(aes(group = survey_area)) +
geom_point(alpha = 0.6) +
facet_grid(survey_area~season) +
scale_color_gmri() +
labs(x = "",
y = "Size Spectrum Slope (b)",
color = "",
title = "Results from Area-Stratified Abundance - log10 bins")
)
Prep and Clustering Code
# Reshaping
# Goal: Row for Years, columns for each different group
l10_cluster_dat <- warmem_group_slopes %>%
select(Year, season, survey_area, l10_slope_strat)
# Pivot wider
l10_dat <- l10_cluster_dat %>%
pivot_wider(names_from = c(season, survey_area),
values_from = l10_slope_strat,
names_sep = "_") %>%
column_to_rownames(var = "Year")
# Run clustering
l10_clust <- run_chron_clust(in_dat = l10_dat)
Broomstick Plot
# broken stick model
vegan::bstick(l10_clust$cl, plot=T)
Dendrogram
plot(l10_clust$cl,
hang = -0.1,
axes = FALSE,
cex = 0.8,
horiz = F)
axis(side = 2, cex.axis = 1.3)
title("Log10 Size Spectrum Clustering Metrics", cex = 1)
mtext(side = 2, line = 2.3, "Sum of squares", cex = 1, las = 0)
l10_patterns +
geom_vline(aes(xintercept = 1987.5), color = "gray40", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 1996.5), color = "gray40", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 1995.5), color = "gray40", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 2001.5), color = "gray40", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 2002.5), color = "gray40", linetype = 2, size = 0.7)
# plot trends of log10 slopes
(int_patterns <- warmem_group_slopes %>%
ggplot(aes(yr, l10_int_strat, color = survey_area)) +
geom_line(aes(group = survey_area)) +
geom_point(alpha = 0.6) +
facet_grid(survey_area~season) +
scale_color_gmri() +
labs(x = "",
y = "Size Spectrum Intercept",
color = "",
title = "Results from Area-Stratified Abundance - log10 bins")
)
Prep and Clustering Code
# Reshaping
# Goal: Row for Years, columns for each different group
l10_intercept_dat <- warmem_group_slopes %>%
select(Year, season, survey_area, l10_int_strat)
# Pivot wider
intercept_dat <- l10_intercept_dat %>%
pivot_wider(names_from = c(season, survey_area),
values_from = l10_int_strat,
names_sep = "_") %>%
column_to_rownames(var = "Year")
# Run Clustering
l10_ints <- run_chron_clust(in_dat = intercept_dat)
Broomstick Plot
# broken stick model
vegan::bstick(l10_ints$cl, plot=T)
Dendrogram
plot(l10_ints$cl,
hang = -0.1,
axes = FALSE,
cex = 0.8,
horiz = F)
axis(side = 2, cex.axis = 1.3)
title("Log10 Bins Intercept - Clustering Metrics", cex = 1)
mtext(side = 2, line = 2.3, "Sum of squares", cex = 1, las = 0)
# Re-plot patterns with breaks
int_patterns +
geom_vline(aes(xintercept = 1987.5), color = "gray50", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 2011.5), color = "gray50", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 1996.5), color = "gray50", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 1995.5), color = "gray50", linetype = 2, size = 0.7) +
labs(subtitle = "Chronological Clustering Breakpoints")
For each of the slope/intercepts derived using a linear model and binned data I also pulled the adjusted r-square to get a sense of whether or not certain groups had poor fits that should be investigated.
# Reshaping
# Goal: Row for Years, columns for each different group
l10_fit_dat <- warmem_group_slopes %>%
select(Year, season, survey_area, l10_slope_strat, l10_int_strat, l10_rsqr_strat) %>%
mutate(yr = as.numeric(Year))
l10_fit_dat %>%
ggplot(aes(yr, l10_rsqr_strat, color = survey_area)) +
geom_rect(xmin = -Inf, xmax = Inf, ymin = 0, ymax = 0.2,
fill = "gray80", color = "transparent") +
geom_hline(yintercept = 0.2, linetype = 2, size = 0.5, color = "gray50") +
geom_line(aes(group = survey_area)) +
geom_point(alpha = 0.6) +
facet_grid(survey_area~season) +
scale_color_gmri() +
scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
labs(x = "",
y = "Linear Model R-Square",
color = "",
title = "Results from Area-Stratified Abundance - log10 bins")
# plot trends of log10 slopes
(sst_patterns <- regional_oisst %>%
ggplot(aes(yr, sst_anom, color = survey_area)) +
geom_line(aes(group = survey_area)) +
geom_point(alpha = 0.6) +
facet_grid(survey_area~.) +
scale_color_gmri() +
labs(x = "",
y = "Mean Temperature Anomaly",
color = "",
title = "")
)
Prep and Clustering Code
# Pivot OISST Wider
oisst_dat <- regional_oisst %>%
select(yr, survey_area, sst_anom) %>%
pivot_wider(names_from = survey_area, values_from = sst_anom) %>%
column_to_rownames(var = "yr")
# Run clustering
sst_clust <- run_chron_clust(oisst_dat)
Broomstick Plot
# broken stick model
vegan::bstick(sst_clust$cl, plot=T)
Dendrogram
plot(sst_clust$cl,
hang = -0.1,
axes = FALSE,
cex = 0.8,
horiz = F)
axis(side = 2, cex.axis = 1.3)
title("SST Anomalies - Clustering Metrics", cex = 1)
mtext(side = 2, line = 2.3, "Sum of squares", cex = 1, las = 0)
# Re-plot patterns with breaks
sst_patterns +
geom_vline(aes(xintercept = 2011.5), color = "gray50", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 1998.5), color = "gray50", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 2002.5), color = "gray50", linetype = 2, size = 0.7) +
geom_vline(aes(xintercept = 2005.5), color = "gray50", linetype = 2, size = 0.7)
This section seeks to track different aspects of the biomass distribution during each of the breaks determined from the break point analyses.
Primarily 1. How is biomass changing for the different functional groups
2. What is the relative balance of biomass across functional groups, and within them, size bins
# Make groups based on breaks
edwards_breaks <- list("1970-1987" = 1970:1987,
"1988-2008" = 1988:2008,
"2009-2019" = 2009:2019)
temp_breaks <- list("1981-1998" = 1981:1998,
"1999-2002" = 1999:2002,
"2003-2005" = 2003:2005,
"2006-2011" = 2006:2011,
"2012-2021" = 2012:2021)
## label the edwards bins
nefsc_size_bins <- imap_dfr(edwards_breaks, function(x,y){
nefsc_size_bins %>%
filter(Year %in% x) %>%
mutate(e_break = y,
e_break = factor(e_break, levels = names(edwards_breaks)))
}) %>% arrange(Year)
# label the temp bins
nefsc_size_bins <- imap_dfr(temp_breaks, function(x,y){
nefsc_size_bins %>%
filter(Year %in% x) %>%
mutate(t_break = y,
t_break = factor(t_break, levels = names(temp_breaks)))
}) %>% arrange(Year)
# Track how things are changing across slope exponent breaks
functional_group_splits <- nefsc_size_bins %>%
split(.$e_break) %>%
imap(function(e_data, label){
# 1. How is biomass trending within each functional group
# Get that yearly summary
yearly_data <- e_data %>%
group_by(Year, spec_class) %>%
summarise(
total_strat_biom = sum(expanded_lwbio_s),
avg_ind_mass = weighted.mean(ind_weight_kg, Number),
avg_ind_len = weighted.mean(LngtClass, Number),
.groups = "drop")
# run linear models
biom_trend_lm <- broom::tidy(lm(total_strat_biom ~ Year,
data = yearly_data))
length_trend_lm <- broom::tidy(lm(avg_ind_len ~ Year,
data = yearly_data))
imass_trend_lm <- broom::tidy(lm(avg_ind_mass ~ Year,
data = yearly_data))
# report direction
biom_slope <- biom_trend_lm[[2,"estimate"]]
len_slope <- length_trend_lm[[2,"estimate"]]
imass_slope <- imass_trend_lm[[2,"estimate"]]
biom_direction <- ifelse(biom_slope > 0, "increase", "decrease")
len_direction <- ifelse(len_slope > 0, "increase", "decrease")
imass_direction <- ifelse(imass_slope > 0, "increase", "decrease")
# report significance
biom_sig <- ifelse(biom_trend_lm[[2,"p.value"]] < 0.05, "significant", "not")
len_sig <- ifelse(length_trend_lm[[2,"p.value"]] < 0.05, "significant", "not")
imass_sig <- ifelse(imass_trend_lm[[2,"p.value"]] < 0.05, "significant", "not")
# make table
trend_table <- data.frame("ebreak" = rep(label, 3),
"data_source" = c("stratified_biomass", "avg_length", "avg_mass"),
"trend" = c(biom_direction, len_direction, imass_direction),
"rate" = c(biom_slope, len_slope, imass_slope),
"significance" = c(biom_sig, len_sig, imass_sig))
# 2. Relative Biomass
total_biom <- sum(e_data$expanded_lwbio_s)
relative_biom <- e_data %>%
group_by(spec_class) %>%
summarise(n_years = n_distinct(Year),
group_biom_total = sum(expanded_lwbio_s),
avg_ann_biom = group_biom_total / n_years,
biom_frac = group_biom_total / total_biom,
avg_ind_mass = weighted.mean(ind_weight_kg, Number),
avg_ind_len = weighted.mean(LngtClass, Number),
.groups = "drop") %>%
mutate(ebreak = label)
# return tables
return(list(#"biom_lm" = biom_trend_lm,
#"len_lm" = length_trend_lm,
"trends" = trend_table,
"rel_bio" = relative_biom))
})
# Pull out trends as a table
fgroup_trends <- map_dfr(functional_group_splits, ~ .x[["trends"]])
# Pull out relative biomass as a table
fgroup_rel_bio <- map_dfr(functional_group_splits, ~ .x[["rel_bio"]])
For each breakpoint period a linear regression was used to determine whether there was a net increase/decrease within that period. The results from all significant regressions (none) are reported below.
fgroup_trends %>% filter(significance == "significant")
[1] ebreak data_source trend rate significance
<0 rows> (or 0-length row.names)
The following figure tracks the relative distribution of catch across the different functional groups.
fgroup_rel_bio %>%
ggplot(aes(ebreak, y = biom_frac, fill = spec_class)) +
geom_col(position = "dodge") +
scale_fill_gmri() +
labs(x = "Stanzas from ISD Exponent Breakpoint Analysis",
y = "Relative Proportion of Stratified Biomass",
fill = "")
This figure visualizes the avarage total biomass (per year) for each functional group within the different size spectrum stanzas.
fgroup_rel_bio %>%
ggplot(aes(ebreak, y = avg_ann_biom/1e6, fill = spec_class)) +
geom_col(position = "dodge") +
scale_fill_gmri() +
labs(x = "Stanzas from ISD Exponent Breakpoint Analysis",
y = "Avg. Annual Biomass (million kg)",
fill = "")
This figure visualizes the average individual bodymass for each functional group within the different size spectrum stanzas.
fgroup_rel_bio %>%
ggplot(aes(ebreak, y = avg_ind_mass, fill = spec_class)) +
geom_col(position = "dodge") +
scale_fill_gmri() +
labs(x = "Stanzas from ISD Exponent Breakpoint Analysis",
y = "Average Individual Bodymass (kg)",
fill = "")
This figure does the same, but for individual length, not bodymass.
fgroup_rel_bio %>%
ggplot(aes(ebreak, y = avg_ind_len, fill = spec_class)) +
geom_col(position = "dodge") +
scale_fill_gmri() +
labs(x = "Stanzas from ISD Exponent Breakpoint Analysis",
y = "Average Individual Length (cm)",
fill = "")
After seeing no large difference in the things above that we expected to see, I plotted the distribution of size spectrum slopes for each stanza. Lot of within gorup variation, not much across group variation.
warmem_group_slopes <- imap_dfr(edwards_breaks, function(x,y){
warmem_group_slopes %>%
filter(Year %in% x) %>%
mutate(e_break = y,
e_break = factor(e_break, levels = names(edwards_breaks)))
}) %>% arrange(Year)
warmem_group_slopes %>%
ggplot(aes(x = e_break, y = b, color = e_break)) +
scale_color_gmri() +
geom_jitter(width = 0.1) +
labs(x = "Stanzas from ISD Exponent Breakpoint Analysis",
y = "Individual Size Distribution Exponent",
color = "")
A work by Adam A. Kemberling
Akemberling@gmri.org